/*

02_sample_descriptives.do

Purpose: discussed below
Inputs: student-level-panel
Outputs: rdd-regressions-main-excl_hsresident, rdd-regressions-main-excl_hsresident-note
	
*/

/*
	RDD regressions on pre-treatment, market and first- and second-stage outcomes
	Running variable: Application risk of non-placement
	
*/

/// Dependent variables
global pretreatment "female black hispanic white"
global simulator_outcomes "warnings_email any_login login_count any_sim"
global choice_outcomes "$outcomes_sec_stage reduce_real_risk reduce_sim_risk"

/// Independent variables
global rdd_vars = "app_risk app_risk_x_above"
global did_vars = "post i.i_riskq"

global full_controls_rdd2020 ""
global part_controls_rdd2020 ""
global full_controls_rdd2019 ""
global part_controls_rdd2019 ""
global full_controls_did ""

// Coefficients of Interest
global coeff_rdd = "above"
global coeff_did = "post_x_above"

// Regression samples 
global samples "rdd2020 did"

// Prepare data
use "$int/student-level-panel", clear

egen i_riskq = cut(app_risk), at(0(.1)1)icodes
replace i_riskq = 9 if mi(i_riskq) & !mi(app_risk)

gen excl_hsresident = hs == 0 | resident == 0 

// Subset sample on those with simulator eligible apps in 2019 and 2020
keep if simulator_app_eligible == 1 & any_baseline == 1

drop if mi(treat) & year == 2020 // these are applicants who have listed new schools which we did not assign to treatment
// Set subsamples
gen rdd2020 = year == 2020
gen rdd2019 = year == 2019
gen did = 1
gen post = year == 2020

gen warnings_treatment = treat == 3
// Don't use extremes of risk distribution for regressions
gen samp_cond_rdd = app_risk > 0.01 & app_risk < 0.99
gen samp_cond_did = 1

replace app_risk = risk_final_initial if year == 2019
// Center running variable at 0
replace app_risk = app_risk - 0.5
gen above = app_risk >= 0 if !mi(app_risk) 
gen post_x_above = post * above if !mi(above)
gen app_risk_x_above = app_risk * (above == 1) if !mi(app_risk)
gen app_risk_x_below = app_risk * (above == 0) if !mi(app_risk)

gen reduce_real_risk = risk_real_diff < 0
gen reduce_sim_risk = risk_sim_diff < 0

label var change_school_ever "Change school"
label var change_school_final "Change school"
label var lengthen_app_ever "Lengthen app."
label var lengthen_app_final "Lengthen app."
label var new_insert_ever "Insert new school"
label var new_insert_final "Insert new school"
label var new_append_ever "Append new school"
label var new_append_final "Append new school"
label var shorten_app_ever "Shorten app."
label var shorten_app_final "Shorten app."
label var modify_ever "Change length or school"
label var modify_final "Change length or school"
label var risk_real_diff "Diff. in realized risk"
label var risk_sim_diff "Diff. in simulated risk"
label var placed_any "Any placement"
label var reduce_real_risk "Any realized risk reduction"
label var reduce_sim_risk "Any simulated risk reduction"

/// Standardize row output		
cap program drop write_rows
program define write_rows
	local list "`1'"
	local reg_samples "`2'"
	local reg = "`3'"
	local p = 1
	foreach var in `list' {

		file write f "`: var la `var''"
			foreach samp in $samples {
				* Skip regression for samples that are not specified and
				* the DiD column for the placement outcomes
				if strpos("`reg_samples'","`samp'") == 0 {
					file write f "& & "
					global N_`samp' ""
				}
				else if "`var'" == "placed_any" & "`samp'" == "did" {
					file write f "& &"
				}
				else {
					cap drop method year_run
					gen method = regexs(1) if regexm("`samp'","([a-z]+)([0-9]+)?")
					gen year_run = "2019"
					if strlen("`samp'") >3 {
						replace year_run = regexs(2) if regexm("`samp'","([a-z]+)([0-9]+)?")
					}
					local method = method
					local year = year_run
					
					// Run regression (either OLS or IV)
					
					// OLS
					if "`reg'" == "ols" {
						qui reg `var' ${coeff_`method'} ${`method'_vars} ///
							${full_controls_`samp'}  ///
							if samp_cond_`method' == 1 & `samp' == 1 ///
							, r
						
						local coeff = _b[${coeff_`method'}]
						local se =_se[${coeff_`method'}]
						local N_`samp'_new = e(N)
						local degrees = e(df_m)

					}
					
					// IV
					if "`reg'" == "iv" {
						qui ivreg2 `var' (modify_ever= ${coeff_`method'}) ///
						${`method'_vars} ///
						if !mi(app_risk) & `samp' == 1 ///
						, r

						local coeff = _b[modify_ever]
						local se = _se[modify_ever]
						local N_`samp'_new = e(N)
						local degrees = e(df_m)

					}
					est sto e_`var'
					
					// Write Coefficient, SE and N of RDD, DiD, IV estimates
						
						// Only write estimates if there are positive degrees of freedom
							* Coeff
							file write f "&" %4.3fc (`coeff')
							* Std. Err
							file write f "& (" %4.3fc (`se') ")"
					
					* Check that all regressions within same panel and sample block have similar N
					if `p' > 1 {
						assert abs(${N_`samp'} - `N_`samp'_new') < 2
					}
					global N_`samp' = `N_`samp'_new'
					di ${N_`samp'}
				}
			} // samp
			local p = `p' + 1

			file write f "\\ "_n
			
		} // var

* Write N Row 
file write f "\textit{N} "
foreach samp in $samples {
	file write f "& & ${N_`samp'}"
}

file write f "\\ \\[-1.0em]"_n

end

/// 
/// Make Table
///


keep if excl_hsresident == 1
cap file close f 

file open f using "$tables/rdd-regressions-main-excl_hsresident.tex", write replace

file write f "\begin{tabular}{l*{4}{c}}"_n

file write f "\toprule \\[-1.0em]"_n

* Column panels
file write f "& \multicolumn{2}{c}{RD} & \multicolumn{2}{c}{Diff. in Diff.} \\"_n
file write f "\cmidrule(lr){2-3} \cmidrule(lr){4-5}"_n

* Columns
file write f "Outcome"
forv k = 1/2 {
	file write f " & $\beta$ & SE "
}

file write f "\\"_n
file write f "\midrule \\[-1.0em]"_n

file write f "\emph{A. Demographics}\\"_n
write_rows "$pretreatment" "$samples" "ols"

file write f "\emph{B. Interaction with Simulator}\\"_n
write_rows "$simulator_outcomes" "rdd2020" "ols"

file write f "\emph{C. Choice Outcomes}\\"_n
write_rows "$choice_outcomes" "$samples" "ols"

file write f "\bottomrule"_n
file write f "\end{tabular}"

cap file close f 

cap file close f 

file open f using "$notes/rdd-regressions-main-excl_hsresident-note.tex", write replace

file write f "\floatfoot{\footnotesize\textit{Notes.} RD and difference-in-differences estimates of the effects of the New Haven,  " 
file write f "CT warnings intervention. The samples for these regressions consist of the universe of "
file write f "applicants to grades PreK, and K in the NHPS simulator study i.e "
file write f "that have been randomized into either control or one of the two treatment "
file write f "groups or the equivalent comparison group in the 2019 application process. "
file write f "RD specifications are based on local linear fit, dropping observations with  "
file write f "predicted portfolio risk of less than 1\% of more than 99\%. For the  "
file write f "difference-in-differences panel, no observations are dropped based on their risk score. "
file write f "Robust SEs in parentheses. See section \ref{sec:NH} for details.}"

file close f